## pipeline script for MethyPattern align in to genome feature
## author: Yaping Liu  lyping1986@gmail.com

my $data_dir = "/home/uec-02/shared/research/yaping/code/GNOME_seq/MethyPatternFeatureWalker/H1_hg19/MethyPatternResult/promoter/";
my $feature_dir = "/home/uec-02/shared/research/yaping/code/GNOME_seq/MethyPatternFeatureWalker/H1_hg19/location_data/";
my $plot_dir = "/home/uec-02/shared/research/yaping/code/GNOME_seq/MethyPatternFeatureWalker/H1_hg19/plot/promoter/";
my $genome_assembly = "hg19";
my $bissnp_dir = "/home/uec-00/yapingli/software/BisSNP/BisSNP-0.40.1.jar";
my $R_plot_script = "/home/uec-00/yapingli/code/mytools/R/MethyPatternFeaturePlot.R";
my $bam_file = "/home/uec-00/yapingli/data/H1_JuengSoo/fullBam/H1_C0B20ACXX_5_KEL656A85.hg19_rCRSchrm.fa.mdups.reheader.bam";
my $reference_seq;
my $dbsnp;
if($genome_assembly eq "hg19"){
	$reference_seq = "/home/uec-00/yapingli/data/genome/hg19_rCRSchrm.fa";
	$dbsnp = "/home/uec-00/yapingli/software/gatk_read/Sting/resources/dbsnp_132.hg19.sort.rod";
}
else{
	$reference_seq = "/home/uec-00/yapingli/data/genome/Homo_sapiens_assembly18_without_random.sort.fasta";
	$dbsnp = "/home/uec-00/yapingli/software/gatk_read/Sting/resources/dbsnp_130_hg18.sort.rod";
}

my $data_matrix_scale = 2000;
my $confidance_level = 20;
my $feature_name = 1;
my $paired_end = "paired";
my $plot_step = 20;
my $plot_scale = 1000;
my $plot_axis_step = 500;
my $smooth = 0;

my $count = 1;
my $use_age = "USAGE: perl MethyPatternFeaturePipeline.pl data_dir feature_dir plot_dir";
if($#ARGV <3){
	print "$use_age\n";
	exit(1);
}

opendir(DH,$feature_dir);
foreach my $feature_file(readdir(DH)){
	if($feature_file =~ /(\S+)\.minusUpstream0\.plusDownstream0(\S+sort.bed)/){
		my $prefix = $1;
		my $suffix = $2;
		my $region_file = $prefix.".minusUpstream2000\.plusDownstream2000".$suffix;
		my $header = "#PBS -q laird\n";
			$header .= "#PBS -l walltime=168:00:00,mem=14g,nodes=1\n";
			$header .= "#PBS -N $feature_file\n";
			$header .= "#PBS -j oe\n";
			$header .= "cd \$PBS_O_WORKDIR\n";
			my $java_cmd = "java -Xmx14g -jar $bissnp_dir -T MethyPatternFeature";
			$java_cmd .= "-R $reference_seq -I $bam_file -D $dbsnp -stand_call_conf 20 -stand_emit_conf 0 -dt NONE -single_sample gnome_test -mmq 30 -mbq 0 -orientated ";
			if($paired_end eq "paired"){
				$java_cmd .= "-pem ";
			}
			$java_cmd .= "-feature $feature_name " ;
			$java_cmd = $java_cmd."-B:$feature_name,bed $feature_dir"."$feature_file ";
			$java_cmd = $java_cmd."-L $feature_dir"."$feature_file ";	
			my $gch_file = $data_dir."$genome_assembly.gch.$data_matrix_scale.txt";
			$java_cmd = $java_cmd."-gchFile $gch_file ";
			my $wcg_file = $data_dir."$genome_assembly.wcg.$data_matrix_scale.txt";
			$java_cmd = $java_cmd."-wcgFile $wcg_file ";
			my $hcg_file = $data_dir."$genome_assembly.hcg.$data_matrix_scale.txt";
			$java_cmd = $java_cmd."-hcgFile $hcg_file ";
			$header .= "$java_cmd\n";
			
			my $R_cmd_plot = "/home/uec-00/shared/production/software/R-2.14.1/R-2.14.1/bin/R --no-restore --no-save --args wd=$plot_dir prefix=$prefix gchfn=$gch_file hcgfn=$hcg_file step=$plot_step scale=$plot_scale axistep=$plot_axis_step smooth=$smooth < $R_plot_script \n";
			$header .= "$R_cmd_plot ";			
			my $outfileName = "submitPBS" . $count;
			open(OUT, ">$outfileName") or die "can not open file:$!";
			print OUT $header;
			close OUT;
			system("qsub $outfileName");
			$count++;
			print "$feature_file is submitted\n";
			unlink $outfileName;
	}
}